Import the weather data.

data("weather_df")

Making our first plot

ggplot(data=weather_df, mapping=aes(x=tmin,y=tmax)) +
  geom_point()
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

Another way to do it is through the pipe function

weather_df %>% 
  ggplot(aes(x=tmin,y=tmax)) +
  geom_point()
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggp_weather_scatterplot=
  weather_df %>% 
  ggplot(aes(x=tmin,y=tmax)) +
  geom_point()
ggp_weather_scatterplot
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

Check that some rows are missing.

weather_df %>% 
  filter(is.na(tmax))
## # A tibble: 17 × 6
##    name         id          date        prcp  tmax  tmin
##    <chr>        <chr>       <date>     <dbl> <dbl> <dbl>
##  1 Molokai_HI   USW00022534 2022-05-31    NA    NA    NA
##  2 Waterhole_WA USS0023B17S 2021-03-09    NA    NA    NA
##  3 Waterhole_WA USS0023B17S 2021-12-07    51    NA    NA
##  4 Waterhole_WA USS0023B17S 2021-12-31     0    NA    NA
##  5 Waterhole_WA USS0023B17S 2022-02-03     0    NA    NA
##  6 Waterhole_WA USS0023B17S 2022-08-09    NA    NA    NA
##  7 Waterhole_WA USS0023B17S 2022-08-10    NA    NA    NA
##  8 Waterhole_WA USS0023B17S 2022-08-11    NA    NA    NA
##  9 Waterhole_WA USS0023B17S 2022-08-12    NA    NA    NA
## 10 Waterhole_WA USS0023B17S 2022-08-13    NA    NA    NA
## 11 Waterhole_WA USS0023B17S 2022-08-14    NA    NA    NA
## 12 Waterhole_WA USS0023B17S 2022-08-15    NA    NA    NA
## 13 Waterhole_WA USS0023B17S 2022-08-16    NA    NA    NA
## 14 Waterhole_WA USS0023B17S 2022-08-17    NA    NA    NA
## 15 Waterhole_WA USS0023B17S 2022-08-18    NA    NA    NA
## 16 Waterhole_WA USS0023B17S 2022-08-19    NA    NA    NA
## 17 Waterhole_WA USS0023B17S 2022-12-31    76    NA    NA

Fancier scatterplotes !

Note that aes is for mapping the points alpha sets the opacity

weather_df %>% 
  ggplot(aes(x=tmin,y=tmax,color=name)) + 
  geom_point(alpha=0.3)+
  geom_smooth(se=FALSE)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

weather_df
## # A tibble: 2,190 × 6
##    name           id          date        prcp  tmax  tmin
##    <chr>          <chr>       <date>     <dbl> <dbl> <dbl>
##  1 CentralPark_NY USW00094728 2021-01-01   157   4.4   0.6
##  2 CentralPark_NY USW00094728 2021-01-02    13  10.6   2.2
##  3 CentralPark_NY USW00094728 2021-01-03    56   3.3   1.1
##  4 CentralPark_NY USW00094728 2021-01-04     5   6.1   1.7
##  5 CentralPark_NY USW00094728 2021-01-05     0   5.6   2.2
##  6 CentralPark_NY USW00094728 2021-01-06     0   5     1.1
##  7 CentralPark_NY USW00094728 2021-01-07     0   5    -1  
##  8 CentralPark_NY USW00094728 2021-01-08     0   2.8  -2.7
##  9 CentralPark_NY USW00094728 2021-01-09     0   2.8  -4.3
## 10 CentralPark_NY USW00094728 2021-01-10     0   5    -1.6
## # ℹ 2,180 more rows

Where you define aesthetics can matter

weather_df %>% 
  ggplot(aes(x=tmin,y=tmax)) + 
  geom_point(aes(color=name, size=prcp),alpha=0.3)+
  geom_smooth(se=FALSE)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_point()`).

weather_df
## # A tibble: 2,190 × 6
##    name           id          date        prcp  tmax  tmin
##    <chr>          <chr>       <date>     <dbl> <dbl> <dbl>
##  1 CentralPark_NY USW00094728 2021-01-01   157   4.4   0.6
##  2 CentralPark_NY USW00094728 2021-01-02    13  10.6   2.2
##  3 CentralPark_NY USW00094728 2021-01-03    56   3.3   1.1
##  4 CentralPark_NY USW00094728 2021-01-04     5   6.1   1.7
##  5 CentralPark_NY USW00094728 2021-01-05     0   5.6   2.2
##  6 CentralPark_NY USW00094728 2021-01-06     0   5     1.1
##  7 CentralPark_NY USW00094728 2021-01-07     0   5    -1  
##  8 CentralPark_NY USW00094728 2021-01-08     0   2.8  -2.7
##  9 CentralPark_NY USW00094728 2021-01-09     0   2.8  -4.3
## 10 CentralPark_NY USW00094728 2021-01-10     0   5    -1.6
## # ℹ 2,180 more rows

Use facetting real quick: works by making multiple plots in a row to column framework

weather_df %>% 
  ggplot(aes(x=tmin,y=tmax)) + 
  geom_point(aes(color=name),alpha=0.3,size=0.3)+
  geom_smooth(se=FALSE)+
  facet_grid(.~name)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

weather_df
## # A tibble: 2,190 × 6
##    name           id          date        prcp  tmax  tmin
##    <chr>          <chr>       <date>     <dbl> <dbl> <dbl>
##  1 CentralPark_NY USW00094728 2021-01-01   157   4.4   0.6
##  2 CentralPark_NY USW00094728 2021-01-02    13  10.6   2.2
##  3 CentralPark_NY USW00094728 2021-01-03    56   3.3   1.1
##  4 CentralPark_NY USW00094728 2021-01-04     5   6.1   1.7
##  5 CentralPark_NY USW00094728 2021-01-05     0   5.6   2.2
##  6 CentralPark_NY USW00094728 2021-01-06     0   5     1.1
##  7 CentralPark_NY USW00094728 2021-01-07     0   5    -1  
##  8 CentralPark_NY USW00094728 2021-01-08     0   2.8  -2.7
##  9 CentralPark_NY USW00094728 2021-01-09     0   2.8  -4.3
## 10 CentralPark_NY USW00094728 2021-01-10     0   5    -1.6
## # ℹ 2,180 more rows
weather_df %>% 
  ggplot(aes(x=tmin,y=tmax)) + 
  geom_point(aes(color=name),alpha=0.3,size=0.3)+
  geom_smooth(se=FALSE)+
  facet_grid(name~.)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

weather_df
## # A tibble: 2,190 × 6
##    name           id          date        prcp  tmax  tmin
##    <chr>          <chr>       <date>     <dbl> <dbl> <dbl>
##  1 CentralPark_NY USW00094728 2021-01-01   157   4.4   0.6
##  2 CentralPark_NY USW00094728 2021-01-02    13  10.6   2.2
##  3 CentralPark_NY USW00094728 2021-01-03    56   3.3   1.1
##  4 CentralPark_NY USW00094728 2021-01-04     5   6.1   1.7
##  5 CentralPark_NY USW00094728 2021-01-05     0   5.6   2.2
##  6 CentralPark_NY USW00094728 2021-01-06     0   5     1.1
##  7 CentralPark_NY USW00094728 2021-01-07     0   5    -1  
##  8 CentralPark_NY USW00094728 2021-01-08     0   2.8  -2.7
##  9 CentralPark_NY USW00094728 2021-01-09     0   2.8  -4.3
## 10 CentralPark_NY USW00094728 2021-01-10     0   5    -1.6
## # ℹ 2,180 more rows

Lets make a somewhat more interesting scatterplot

weather_df %>% 
  ggplot(aes(x=tmin,y=tmax,color=name, shape=name)) + 
  geom_point(aes(size=prcp),alpha=0.3)+
  geom_smooth(se=FALSE)+
  facet_grid(.~name)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_point()`).

Learning Assessment: Write a code chain that starts with weather_df; focuses only on Central Park, converts temperatures to Fahrenheit, makes a scatterplot of min vs. max temperature, and overlays a linear regression line (using options in geom_smooth())

This R code is taking a dataset of weather observations and producing a scatterplot of daily minimum versus maximum temperatures for Central Park, New York. It begins with weather_df and uses the pipe operator %>% to pass the data through a sequence of transformations. The first step filters the dataset so that only rows where the variable name equals "CentralPark_NY" remain. Next, mutate() creates two new variables—tmax_fahr and tmin_fahr—by converting the maximum and minimum daily temperatures from Celsius to Fahrenheit using the formula °F = (°C × 9/5) + 32. With these new variables in place, the code calls ggplot() to set up a graph in which tmin_fahr is mapped to the x-axis and tmax_fahr to the y-axis. The geom_point(alpha = 0.5) layer then plots the data as semi-transparent points, which helps visualize overlapping values. Finally, geom_smooth(method = "lm", se = FALSE) adds a linear regression line to the plot without showing the shaded confidence interval.

weather_df %>% 
  filter(name=="CentralPark_NY") %>% 
  mutate(
    tmax_fahr=tmax*(9/5)+32,
    tmin_fahr=tmin*(9/5)+32) %>% 
  ggplot(aes(x=tmin_fahr,y=tmax_fahr))+
  geom_point(alpha=0.5)+
  geom_smooth(method = "lm",se=FALSE)
## `geom_smooth()` using formula = 'y ~ x'

##Small things

weather_df %>% 
  ggplot(aes(x=tmin,y=tmax,color=name,shape=name))+
  #geom_point(alpha=0.3)+
  geom_smooth(se=FALSE)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_smooth()`).

weather_df %>% 
  ggplot(aes(x=tmin,y=tmax,color=name,shape=name))+
  geom_smooth(se=FALSE)+
  geom_point()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

weather_df %>% 
  ggplot(aes(x=tmin,y=tmax))+
  geom_hex()
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_binhex()`).

weather_df %>% 
  ggplot(aes(x=tmin,y=tmax))+
  geom_point(color="green")
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

##Univariate plots

weather_df %>% 
  ggplot(aes(x=tmin))+
  geom_histogram(color="white",fill="green")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_bin()`).

weather_df %>% 
  ggplot(aes(x=tmin,fill=name))+
  geom_histogram()+
  facet_grid(name~.)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_bin()`).

maybe a density plot ?

weather_df %>% 
  ggplot(aes(x=tmin,fill=name))+
  geom_density(alpha=0.2)
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_density()`).

Box plot

weather_df %>% 
  ggplot(aes(x=name,y=tmin))+
  geom_boxplot(aes(fill=name))
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

violin plots

weather_df %>% 
  ggplot(aes(x=name,y=tmin,fill=name))+
  geom_violin()
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

ridge plot

weather_df %>% 
  ggplot(aes(x=tmin,y=name,fill=name))+
  geom_density_ridges()
## Picking joint bandwidth of 1.41
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).

LA univariate plots Learning assessment: Make plots that compare precipitation across locaations. Try a histogram, a density plot, a boxplot, a violin plot, and a ridgeplot; use aesthetic mappings to make your figure readable

Density Plot

weather_df %>% 
  filter(prcp>5, prcp<1000) %>% 
  ggplot(aes(x=prcp,fill=name))+
  geom_density()

weather_df %>% 
  ggplot(aes(x=prcp,fill=name))+
  geom_density()
## Warning: Removed 15 rows containing non-finite outside the scale range
## (`stat_density()`).

Ridge plot

weather_df %>% 
  ggplot(aes(x=prcp,y=name,fill=name))+
  geom_density_ridges(scale=0.85)
## Picking joint bandwidth of 9.22
## Warning: Removed 15 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).

Boxplot

weather_df %>% 
  ggplot(aes(y=prcp,x=name))+
  geom_boxplot()
## Warning: Removed 15 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

##saving and embedding plots

saving plots

ggp_weather_violin=
weather_df %>% 
  ggplot(aes(x=name,y=tmin,fill=name))+
  geom_violin()

ggp_weather_violin
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

ggsave("plots/violin_plot.pdf",ggp_weather_violin,
       width=8,height=6)
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_ydensity()`).